library(ggplot2)
library(forcats)
library(dplyr)
library(tidyr)
library(lme4)
library(GGally)
library(lattice)
library(INLA)
library(gstat)
library(fields)
library(reshape)
library(rgdal)
library(rgeos)
library(gridExtra)
library(plotly)
#Loading dataset
#Topo lidar data#
topo_df <- read.csv("data/lidar_final_spatialjoin.csv") %>%
select(-ID_sectie, -join_KRIBV, -join_KD, -join_KD_nr, -join_Opp_m, -distance) #delete redundant data
nrow(topo_df)
## [1] 7890
colnames(topo_df)
## [1] "gridcell" "marram_san" "mean_diff" "sd_diff" "n"
## [6] "mean_15" "mean_19" "X" "Y" "dist_HW"
## [11] "join_ID"
#data for H (joint count, H, of Marram)
jc_df <- read.csv("data/BE_joincount_2.csv") %>%
filter(CLASS==6) %>%#filter the marram-values; marram is class 6
select(STD_DEVIATE, gridcell)
#data for P (proportion of Marram)
p_df <- read.csv("data/BE_classtat_2.csv") %>%
filter(class==6) %>%#filter the marram-values; marram is class 6
select(prop.landscape, gridcell)
#joining the datasets
vegetation <- topo_df %>% left_join(jc_df, by="gridcell") %>%
left_join(p_df, by="gridcell")
#filter only cells with marram_sand_prop > 0.75 and delete NA's
vegetation <- vegetation %>% filter(marram_san >= 0.75) %>%
drop_na()
#write.csv(vegetation_sel, "filtered_lidar_final.csv") #to check spatially the filtered data in QGIS
#renaming the data
vegetation <- vegetation %>%
rename(H=STD_DEVIATE, P=prop.landscape, ID_sectie=join_ID)
#standardizing covariates P, H #P is between 0 and 1 so ok
vegetation <- vegetation %>% mutate(Pstd=P) %>%
mutate(Hstd=scale(H, center=mean(H), scale=sd(H))) %>%
mutate(Diststd=scale(dist_HW, center=mean(dist_HW), scale=sd(dist_HW))) %>%
#mutating X and Y into km
mutate(Xkm = X/1000, Ykm=Y/1000)
head(vegetation)
## gridcell marram_san mean_diff sd_diff n mean_15 mean_19 X
## 1 22270199210 0.9538462 0.13518560 0.2257488 361 8.526061 8.661247 22270
## 2 22290199170 0.9910714 0.19333333 0.2213239 228 8.560675 8.754009 22290
## 3 22290199210 0.9673721 0.38110803 0.2862125 361 7.362194 7.743302 22290
## 4 22290199230 1.0000000 0.21843767 0.3680388 361 6.815493 7.033931 22290
## 5 22310199170 0.9374486 0.32186981 0.2088671 361 8.841568 9.163438 22310
## 6 22310199190 0.8368000 0.08634167 0.1714334 360 9.330414 9.416756 22310
## Y dist_HW ID_sectie H P Pstd Hstd
## 1 199210 77.50226 1 8.086036 0.12307692 0.12307692 -3.8419514
## 2 199170 121.15867 1 11.560220 0.02678571 0.02678571 -3.5182369
## 3 199210 82.35297 2 55.103303 0.19841270 0.19841270 0.5389819
## 4 199230 62.95012 2 55.159987 0.02640000 0.02640000 0.5442635
## 5 199170 126.00939 2 57.770901 0.19300412 0.19300412 0.7875410
## 6 199190 106.66227 2 59.892914 0.34440000 0.34440000 0.9852641
## Diststd Xkm Ykm
## 1 -0.79638864 22.27 199.21
## 2 0.09358064 22.29 199.17
## 3 -0.69750316 22.29 199.21
## 4 -1.09304506 22.29 199.23
## 5 0.19246612 22.31 199.17
## 6 -0.20193953 22.31 199.19
nrow(vegetation)
## [1] 2157
#data for sand suppletion#
#get suppletion data
suppletions <- read.csv("data/volumes_droogstrand.csv", sep=";") %>%
#only get values from 2015-2019
filter(survey.datum %in% c("17/05/2015", "10/04/2016", "26/05/2017", "17/04/2018", "20/04/2019")) %>%
gather(ID_sectie, dV, X1:X266) %>%
group_by(ID_sectie) %>% summarise(meandV=mean(dV)) %>% #calculate average of dV
separate(ID_sectie, into=c("X", "ID_sectie"), sep="X") %>%
mutate(ID_sectie=as.numeric(ID_sectie)) %>%
select(-X) %>% arrange(ID_sectie)
## `summarise()` ungrouping output (override with `.groups` argument)
head(suppletions)
## # A tibble: 6 x 2
## ID_sectie meandV
## <dbl> <dbl>
## 1 1 -4.18
## 2 2 -1.22
## 3 3 -2.4
## 4 4 -2
## 5 5 1.84
## 6 6 0.4
vegetation <- vegetation %>% left_join(suppletions, by="ID_sectie") %>% #get suppletion value into dataset
mutate(meandVstd = scale(meandV)) #standardize it
#Data exploration
#1. Outliers
#Cleveland dotcharts see Zuur et al 2010 doi: 10.1111/j.2041-210X.2009.00001.x
par(mfrow = c(1,4))
dotchart(vegetation$P, main="P")
dotchart(vegetation$H, main="H")
dotchart(vegetation$mean_diff, main="mean_diff") #maybe two outlier in mean_diff on the left
dotchart(vegetation$sd_diff, main="sd_diff")
dotchart(vegetation$meandV, main="dV")
#2. Multicollinearity between covariates
#correlation
par(mfrow=c(1,1))
plot(Hstd~Pstd, data=vegetation) #-->bit of a parabolic relation?
plot(H~P, data=vegetation)
# myVars <- vegetation[,c("Hstd", "Pstd")]
# ggpairs(myVars)
#3. Relationships Y~X
par(mfrow=c(2,2))
plot(mean_diff~Hstd, data=vegetation)#centered around zero
plot(mean_diff~Pstd, data=vegetation)#centered around zero
plot(sd_diff~Hstd, data=vegetation)#parabolic?
plot(sd_diff~Pstd, data=vegetation)#maybe linear, depends on noise
plot(mean_diff~meandVstd, data=vegetation)
plot(sd_diff~meandVstd, data=vegetation)
plot(mean_diff~H, data=vegetation)
plot(mean_diff~P, data=vegetation)
#4. spatial plot
xyplot(Ykm~Xkm, data=vegetation,
aspect = "iso", col = 1, pch = 16)
#5. which kind of regression?
par(mfrow=c(1,1))
hist(vegetation$mean_diff, breaks=seq(-15,6,0.5) ) #Gaussian
hist(vegetation$sd_diff, breaks=seq(0,6,0.1) ) #gamma GLM with log-link, for >0 data
#Non-spatial model with INLA
#for mean_diff
f0_diff <- mean_diff ~ Pstd + Hstd + Pstd:Hstd +
I(Pstd^2) + I(Hstd^2) +Hstd:I(Pstd^2) + Pstd:I(Hstd^2) + Diststd + meandVstd
M0_diff <- inla(f0_diff, control.compute = list(dic = TRUE, waic = TRUE),
family = "gaussian", data = vegetation)
summary(M0_diff)
##
## Call:
## c("inla(formula = f0_diff, family = \"gaussian\", data = vegetation, ", " control.compute = list(dic = TRUE, waic = TRUE))")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 1.7733 2.9435 0.9301 5.6470
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) 0.3407 0.0231 0.2954 0.3407 0.3860 0.3407 0
## Pstd 0.0895 0.1403 -0.1860 0.0895 0.3649 0.0895 0
## Hstd 0.0292 0.0241 -0.0181 0.0292 0.0765 0.0292 0
## I(Pstd^2) -0.4275 0.1615 -0.7447 -0.4275 -0.1106 -0.4275 0
## I(Hstd^2) -0.0037 0.0078 -0.0190 -0.0037 0.0115 -0.0037 0
## Diststd -0.1490 0.0102 -0.1691 -0.1490 -0.1291 -0.1490 0
## meandVstd 0.0841 0.0097 0.0651 0.0841 0.1032 0.0841 0
## Pstd:Hstd -0.2026 0.1341 -0.4659 -0.2026 0.0604 -0.2026 0
## Hstd:I(Pstd^2) 0.0711 0.1556 -0.2344 0.0711 0.3763 0.0711 0
## Pstd:I(Hstd^2) -0.0339 0.0206 -0.0744 -0.0339 0.0065 -0.0339 0
##
## The model has no random effects
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant
## Precision for the Gaussian observations 5.407 0.1627 5.093 5.406
## 0.975quant mode
## Precision for the Gaussian observations 5.732 5.402
##
## Expected number of effective parameters(std dev): 10.07(0.0022)
## Number of equivalent replicates : 214.17
##
## Deviance Information Criterion (DIC) ...............: 2495.01
## Deviance Information Criterion (DIC, saturated) ....: 2171.12
## Effective number of parameters .....................: 11.11
##
## Watanabe-Akaike information criterion (WAIC) ...: 2501.18
## Effective number of parameters .................: 16.92
##
## Marginal log-Likelihood: -1315.55
## Posterior marginals for linear predictor and fitted values computed
#for sd_diff
f0_err <- sd_diff ~ Pstd + Hstd + Pstd:Hstd +
I(Pstd^2) + I(Hstd^2) +Hstd:I(Pstd^2) + Pstd:I(Hstd^2) + Diststd + meandVstd
M0_err <- inla(f0_err, control.compute = list(dic = TRUE, waic = TRUE),
#verbose=TRUE,
family = "gamma", data = vegetation)
summary(M0_err)
##
## Call:
## c("inla(formula = f0_err, family = \"gamma\", data = vegetation, control.compute = list(dic = TRUE, ", " waic = TRUE))")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 0.8861 4.9629 0.5647 6.4138
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## (Intercept) -0.9195 0.0333 -0.9845 -0.9196 -0.8539 -0.9199 0
## Pstd -0.9225 0.2001 -1.3161 -0.9222 -0.5306 -0.9217 0
## Hstd 0.0057 0.0356 -0.0640 0.0056 0.0757 0.0055 0
## I(Pstd^2) -0.3252 0.2293 -0.7740 -0.3257 0.1260 -0.3267 0
## I(Hstd^2) -0.0283 0.0110 -0.0497 -0.0284 -0.0065 -0.0286 0
## Diststd -0.1681 0.0145 -0.1966 -0.1681 -0.1395 -0.1681 0
## meandVstd -0.0354 0.0139 -0.0628 -0.0354 -0.0081 -0.0353 0
## Pstd:Hstd 0.1851 0.2010 -0.2116 0.1858 0.5775 0.1873 0
## Hstd:I(Pstd^2) -0.2571 0.2300 -0.7059 -0.2581 0.1967 -0.2600 0
## Pstd:I(Hstd^2) 0.0155 0.0290 -0.0411 0.0154 0.0728 0.0151 0
##
## The model has no random effects
##
## Model hyperparameters:
## mean sd 0.025quant
## Precision parameter for the Gamma observations 2.60 0.0737 2.457
## 0.5quant 0.975quant mode
## Precision parameter for the Gamma observations 2.599 2.747 2.597
##
## Expected number of effective parameters(std dev): 10.03(0.001)
## Number of equivalent replicates : 214.97
##
## Deviance Information Criterion (DIC) ...............: -2195.14
## Deviance Information Criterion (DIC, saturated) ....: 2307.50
## Effective number of parameters .....................: 11.07
##
## Watanabe-Akaike information criterion (WAIC) ...: -2190.88
## Effective number of parameters .................: 14.78
##
## Marginal log-Likelihood: 1037.66
## Posterior marginals for linear predictor and fitted values computed
##Checking assumptions
#residuals
Fit0_diff <- M0_diff$summary.fitted.values[1:nrow(vegetation),"mean"]
E0_diff <- vegetation$mean_diff - Fit0_diff
Fit0_err <- M0_err$summary.fitted.values[1:nrow(vegetation),"mean"]
E0_err <- vegetation$sd_diff - Fit0_err
plot_assump <- function(Fit0, E0){
par(mfrow=c(2,2))
# Homogeneity
plot(x = Fit0, y = E0)
abline(h = 0, v = 0)
# Normality
hist(E0, breaks = 25)
# Independence due to model misfit
plot(x = vegetation$P, y = E0)
abline(h = 0)
plot(x=vegetation$Hstd, y = E0)
abline(h = 0)
}
plot_assump(Fit0_diff, E0_diff)
plot_assump(Fit0_err, E0_err)
#Variogram to look at spatial autocorrelation
##Semivariogram of residuals
vegetation <- vegetation %>% mutate(
mu0_diff = M0_diff$summary.fitted.values$mean,
Var0_diff = (M0_diff$summary.fitted.values$sd)^2,
Res_diff = mean_diff-mu0_diff, #Pearson's residuals, for Generalized LM, would be /sqrt(Var0)
mu0_err = M0_err$summary.fitted.values$mean,
Res_err = mean_diff-mu0_err)
V0_diff <- variogram(Res_diff ~ 1, locations = ~Xkm + Ykm,
data = as.data.frame(vegetation), cressie = TRUE,
cutoff=5)
V0_err <- variogram(Res_err ~ 1, locations = ~Xkm + Ykm,
data = as.data.frame(vegetation), cressie = TRUE,
cutoff=5)
plot_spatialres <- function(Res){
##Spatial pattern in residuals?
#plotting on map
MyCex <- 3*abs(Res)/max(Res) +0.5
Sign <- as.numeric(Res>=0)+1
MyPch <- c(1,16)[Sign]
xyplot(Ykm~Xkm, data=vegetation, aspect="iso",
cex=MyCex, pch=MyPch, col=MyPch)
}
#spatial plot for mean_diff
plot(V0_diff)
plot_spatialres(Res=vegetation$Res_diff)
#spatial plot for sd_diff
plot(V0_err)
plot_spatialres(Res=vegetation$Res_err)
#fitted data vs real data
p_pred_diff <- ggplot(data= vegetation, aes(x=mean_diff, y=mu0_diff))+
geom_point()+
geom_abline(intercept=0, slope=1, linetype="longdash", color='red')+
ggtitle("Prediciton ~ Real value for mean_diff")
p_pred_err <- ggplot(data=vegetation, aes(x=sd_diff, y=exp(mu0_err)) )+
geom_point()+
geom_abline(intercept=0, slope=1, linetype="longdash", color="red")+
ggtitle("Prediciton ~ Real value for sd_diff")
par(mfrow=c(1,2))
p_pred_diff
p_pred_err
#Spatial model with INLA
#check distribution of distances between datapoints
Loc <- cbind(vegetation$Xkm, vegetation$Ykm)
D <- dist(Loc)
hist(D,freq = TRUE, main = "",
xlab = "Distance between sites (km)",
ylab = "Frequency")
abline(v = 50, lty = 2, col = 2)
plot(x = sort(D),
y = (1:length(D))/length(D),
type = "l",
xlab = "Distance between sites (km)",
ylab = "Cumulative proportion")
abline(h = 0.1, lty = 2, col = 2)
#make a mesh (to approximate spatial field on)
RangeGuess_diff <- 1
MaxEdge_diff <- RangeGuess_diff / 5
mesh_diff <- inla.mesh.2d(Loc,max.edge = c(3,5)*MaxEdge_diff, cutoff = MaxEdge_diff/5)#good way to start according to Zuur INLA book
plot(mesh_diff, asp=1)
mesh_diff$n
## [1] 2249
RangeGuess_err <- 1
MaxEdge_err <- RangeGuess_err/5
mesh_err <- inla.mesh.2d(Loc, max.edge = c(3,5)*MaxEdge_err, cutoff = MaxEdge_err/5)
mesh_err$n
## [1] 2249
#2. making a stack (to link mesh with data)
A_diff <- inla.spde.make.A(mesh_diff, loc = Loc)
A_err <- inla.spde.make.A(mesh_err, loc=Loc)
spde_diff <- inla.spde2.pcmatern(mesh_diff, prior.range = c(1, 0.5), prior.sigma = c(1.6, 0.05))#Differnce [-2.8,3.6] range/4
spde_err <- inla.spde2.pcmatern(mesh_err, prior.range = c(1, 0.5), prior.sigma = c(1.375, 0.05))#sd_diff [0.015, 5.5]
w1.index_diff <- inla.spde.make.index('w', n.spde = spde_diff$n.spde)
w1.index_err <- inla.spde.make.index('w', n.spde = spde_err$n.spde)
Xm <- model.matrix(~ Pstd * Hstd * Diststd*meandVstd, data = vegetation)
N <- nrow(vegetation)
X <- data.frame(Pstd = Xm[,2], Hstd = Xm[,3], Diststd= Xm[,4], meandVstd=Xm[,5])
Stack1_diff <- inla.stack(tag = "Fit", data = list(y = vegetation$mean_diff),
A = list(1, 1, A_diff), effects = list(Intercept = rep(1, N),
X = as.data.frame(X), w = w1.index_diff))
Stack1_err <- inla.stack(tag = "Fit", data = list(y = vegetation$sd_diff),
A = list(1, 1, A_err), effects = list(Intercept = rep(1, N),
X = as.data.frame(X), w = w1.index_err))
#predictions have to be calculated during model fitting with inla (inlabru can do it afterwards)
#predictions are calculated for a 'average' spatial position
#so A doesn't have the spatial components of the spde
Stackpred_diff <- inla.stack(tag = "Covariates", data = list(y = NA),
A = list(1, 1), effects = list(Intercept = rep(1, N),
Xp = as.data.frame(X)))
Stackpred_err <- inla.stack(tag = "Covariates", data = list(y = NA),
A = list(1, 1), effects = list(Intercept = rep(1, N),
Xp = as.data.frame(X)))
#interpolations for prediction surface
interpol_df <- tibble(Pstd=seq(0,1, by=0.01), H=seq(20,70,by=0.5)) %>%
tidyr::expand(Pstd, H) %>%
mutate(Hstd=scale(H, center=mean(vegetation$H), scale=sd(vegetation$H))) %>%
tibble::add_column(Diststd = mean(vegetation$Diststd)) %>%#give mean distHW
tibble::add_column(meandVstd = mean(vegetation$meandVstd))#give mean suppletion meandV
Xm_inter <- model.matrix(~ Pstd * Hstd * Diststd * meandVstd, data = interpol_df)
N_inter <- nrow(interpol_df)
X_inter <- data.frame(Pstd = Xm_inter[,2], Hstd = Xm_inter[,3], Diststd= Xm_inter[,4], meandVstd=Xm_inter[,5])
####TO ADD#####
Stackinter_diff <- inla.stack(tag= "Interpolations", data = list(y = NA),
A = list(1, 1), effects = list(Intercept = rep(1, N_inter),
Xi = as.data.frame(X_inter)))
Stackinter_err <- inla.stack(tag= "Interpolations", data = list(y = NA),
A = list(1, 1), effects = list(Intercept = rep(1, N_inter),
Xi = as.data.frame(X_inter)))
Stack_diff <- inla.stack(Stack1_diff, Stackpred_diff, Stackinter_diff)
Stack_err <- inla.stack(Stack1_err, Stackpred_err, Stackinter_err)
#3. specify model formula
f_diff <- y ~ -1 + Intercept + Pstd + Hstd + Pstd:Hstd + I(Pstd^2) + I(Hstd^2) +
Hstd:I(Pstd^2) + Pstd:I(Hstd^2) + Diststd + meandVstd +
f(w, model=spde_diff)
f_err <- y ~ -1 + Intercept + Pstd + Hstd + Pstd:Hstd + I(Pstd^2) + I(Hstd^2) +
Hstd:I(Pstd^2) + Pstd:I(Hstd^2) + Diststd + meandVstd +
f(w, model=spde_err)
M1_diff <- inla(f_diff, family = "gaussian", data = inla.stack.data(Stack_diff),
control.compute = list(dic = TRUE, config=TRUE),
#control.family = list(hyper = list(prec = list(param = c(1, 0.2)))),
control.predictor = list(A = inla.stack.A(Stack_diff)))
M1_err <- inla(f_err, family = "gamma", data = inla.stack.data(Stack_err),
control.compute = list(dic = TRUE, config=TRUE), #verbose=TRUE,
control.predictor = list(A = inla.stack.A(Stack_err)))
summary(M1_diff)
##
## Call:
## c("inla(formula = f_diff, family = \"gaussian\", data = inla.stack.data(Stack_diff), ", " control.compute = list(dic = TRUE, config = TRUE), control.predictor = list(A = inla.stack.A(Stack_diff)))" )
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 2.0614 139.6663 3.2018 144.9295
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## Intercept 0.2866 0.0936 0.1087 0.2835 0.4824 0.2782 0
## Pstd 0.7910 0.1195 0.5562 0.7910 1.0252 0.7912 0
## Hstd 0.0492 0.0190 0.0119 0.0492 0.0864 0.0492 0
## I(Pstd^2) -0.9358 0.1328 -1.1964 -0.9359 -0.6751 -0.9360 0
## I(Hstd^2) 0.0078 0.0061 -0.0041 0.0078 0.0197 0.0078 0
## Diststd -0.2258 0.0360 -0.2988 -0.2251 -0.1569 -0.2236 0
## meandVstd 0.0300 0.0435 -0.0571 0.0306 0.1136 0.0320 0
## Pstd:Hstd -0.2214 0.1056 -0.4288 -0.2214 -0.0142 -0.2215 0
## Hstd:I(Pstd^2) 0.0505 0.1211 -0.1874 0.0505 0.2882 0.0506 0
## Pstd:I(Hstd^2) -0.0367 0.0165 -0.0690 -0.0367 -0.0044 -0.0367 0
##
## Random effects:
## Name Model
## w SPDE2 model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant
## Precision for the Gaussian observations 12.6147 0.4571 11.7350 12.6080
## Range for w 0.3157 0.0602 0.2176 0.3087
## Stdev for w 0.6254 0.0830 0.4831 0.6180
## 0.975quant mode
## Precision for the Gaussian observations 13.5348 12.5972
## Range for w 0.4532 0.2943
## Stdev for w 0.8086 0.6017
##
## Expected number of effective parameters(std dev): 381.11(13.70)
## Number of equivalent replicates : 5.66
##
## Deviance Information Criterion (DIC) ...............: 1041.87
## Deviance Information Criterion (DIC, saturated) ....: 2545.17
## Effective number of parameters .....................: 383.42
##
## Marginal log-Likelihood: -842.31
## Posterior marginals for linear predictor and fitted values computed
summary(M1_err)
##
## Call:
## c("inla(formula = f_err, family = \"gamma\", data = inla.stack.data(Stack_err), ", " control.compute = list(dic = TRUE, config = TRUE), control.predictor = list(A = inla.stack.A(Stack_err)))" )
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 1.2803 793.6152 2.7736 797.6690
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## Intercept -1.3858 0.0809 -1.5472 -1.3852 -1.2281 -1.3839 0
## Pstd 0.5340 0.1847 0.1709 0.5342 0.8961 0.5345 0
## Hstd 0.0298 0.0296 -0.0282 0.0298 0.0880 0.0298 0
## I(Pstd^2) -1.2180 0.2055 -1.6209 -1.2182 -0.8145 -1.2186 0
## I(Hstd^2) -0.0173 0.0093 -0.0354 -0.0174 0.0009 -0.0174 0
## Diststd -0.3364 0.0440 -0.4250 -0.3357 -0.2519 -0.3342 0
## meandVstd 0.0381 0.0524 -0.0656 0.0382 0.1406 0.0386 0
## Pstd:Hstd 0.0236 0.1653 -0.3016 0.0238 0.3474 0.0242 0
## Hstd:I(Pstd^2) -0.1194 0.1901 -0.4920 -0.1197 0.2543 -0.1203 0
## Pstd:I(Hstd^2) 0.0135 0.0255 -0.0364 0.0135 0.0636 0.0134 0
##
## Random effects:
## Name Model
## w SPDE2 model
##
## Model hyperparameters:
## mean sd 0.025quant
## Precision parameter for the Gamma observations 5.3746 0.1849 5.0183
## Range for w 0.1863 0.0230 0.1471
## Stdev for w 0.6939 0.0441 0.6127
## 0.5quant 0.975quant mode
## Precision parameter for the Gamma observations 5.3721 5.7462 5.3680
## Range for w 0.1842 0.2372 0.1792
## Stdev for w 0.6919 0.7863 0.6870
##
## Expected number of effective parameters(std dev): 387.90(13.50)
## Number of equivalent replicates : 5.561
##
## Deviance Information Criterion (DIC) ...............: -3526.99
## Deviance Information Criterion (DIC, saturated) ....: 2613.28
## Effective number of parameters .....................: 387.69
##
## Marginal log-Likelihood: 1469.07
## Posterior marginals for linear predictor and fitted values computed
#save the mesh and models to load later, as these models take long to run
#save(mesh_diff, file="output/mesh_diff.rda")
#save(mesh_err, file="output/mesh_err.rda")
#save(M1_diff, file="output/M1_diff.rda")
#save(M1_err, file="output/M1_err.rda")
###Plotting spatial autocorrelation of residuals
##Fitted values and residuals
#index of the fitted values
indexFit_diff <- inla.stack.index(Stack_diff, tag = "Fit")$data #fitted data (with SAC)
indexCov_diff <- inla.stack.index(Stack_diff, tag = "Covariates")$data #predictions
indexInt_diff <- inla.stack.index(Stack_diff, tag = "Interpolations")$data #interpolations
indexFit_err <- inla.stack.index(Stack_err, tag= "Fit")$data
indexCov_err <- inla.stack.index(Stack_err, tag="Covariates")$data
indexInt_err <- inla.stack.index(Stack_err, tag="Interpolations")$data
#Difference
vegetation$Fit_diff_sp <- M1_diff$summary.fitted.values[indexFit_diff,"mean"]
vegetation$Pred_diff_sp <- M1_diff$summary.fitted.values[indexCov_diff, "mean"]
vegetation$Res_diff_sp <- vegetation$mean_diff - vegetation$Fit_diff_sp
interpol_df$Int_diff_mean <- M1_diff$summary.fitted.values[indexInt_diff, "mean"]
interpol_df$Int_diff_sd <- M1_diff$summary.fitted.values[indexInt_diff, "sd"]
interpol_df$Int_diff_0.025quant <- M1_diff$summary.fitted.values[indexInt_diff, "0.025quant"]
interpol_df$Int_diff_0.975quant <- M1_diff$summary.fitted.values[indexInt_diff, "0.975quant"]
#Error
vegetation$Fit_err_sp <- M1_err$summary.fitted.values[indexFit_err, "mean"]
vegetation$Pred_err_sp <- M1_err$summary.fitted.values[indexCov_err, "mean"]
vegetation$Res_err_sp <- vegetation$sd_diff - vegetation$Fit_err_sp
interpol_df$Int_err_mean <- M1_err$summary.fitted.values[indexInt_err, "mean"]
interpol_df$Int_err_sd <- M1_diff$summary.fitted.values[indexInt_err, "sd"]
interpol_df$Int_err_0.025quant <- M1_err$summary.fitted.values[indexInt_err, "0.025quant"]
interpol_df$Int_err_0.975quant <- M1_err$summary.fitted.values[indexInt_err, "0.975quant"]
#calculate variogram
V0_diff <- variogram(Res_diff ~ 1, locations=~Xkm + Ykm,
data=as.data.frame(vegetation), cressie=T, cutoff=1)
V1_diff <- variogram(Res_diff_sp ~ 1, locations = ~Xkm + Ykm,
data = as.data.frame(vegetation), cressie = TRUE, cutoff=1)
V0_err <- variogram(Res_err ~ 1, locations=~Xkm + Ykm,
data=as.data.frame(vegetation), cressie=T, cutoff=1)
V1_err <- variogram(Res_err_sp ~ 1, locations = ~Xkm + Ykm,
data = as.data.frame(vegetation), cressie = TRUE, cutoff=1)
#plotting variograms
p_variogram_diff <- ggplot() +
geom_point(data = V0_diff, aes(x = dist, y = gamma)) +
geom_line(data = V0_diff, aes(x = dist, y = gamma),col = "red")+
geom_point(data = V1_diff,aes(x = dist, y = gamma)) +
geom_line(data = V1_diff,aes(x = dist, y = gamma),col = "blue")+
#scale_x_continuous(limits=c(0,50))+
ggtitle("Semi-variogram for mean_diff")+
xlab("Distance (km)") +
ylab("Cressie's Semi-variance") +
theme(text = element_text(size = 15)) +
theme_classic()
#theme(legend.position="none")
#ylim(0,1.5)
p_variogram_diff
p_variogram_err <- ggplot() +
geom_point(data = V0_err, aes(x = dist, y = gamma)) +
geom_line(data = V0_err, aes(x = dist, y = gamma),col = "red")+
geom_point(data = V1_err,aes(x = dist, y = gamma)) +
geom_line(data = V1_err,aes(x = dist, y = gamma),col = "blue")+
#scale_x_continuous(limits=c(0,50))+
ggtitle("Semi-variogram for sd_diff")+
xlab("Distance") +
ylab("Cressie's Semi-variance") +
theme(text = element_text(size = 15)) +
theme_classic()
#theme(legend.position="none")
#ylim(0,1.5)
p_variogram_err
#plotting spatial autocorrelation on map
Fit_diff_sp <- vegetation$Fit_diff_sp
Res_diff_sp <- vegetation$Res_diff_sp
MyCex <- 3*abs(Res_diff_sp)/max(Res_diff_sp) +0.5
Sign <- as.numeric(Res_diff_sp>=0)+1
MyPch <- c(1,16)[Sign]
xyplot(Ykm~Xkm, data=vegetation, aspect="iso",
cex=MyCex, pch=MyPch, col=MyPch, main="Residuals Diff, color is sign, size~size")
Fit_err_sp <- vegetation$Fit_err_sp
Res_err_sp <- vegetation$Res_err_sp
MyCex <- 3*abs(Res_err_sp)/max(Res_err_sp) +0.5
Sign <- as.numeric(Res_err_sp>=0)+1
MyPch <- c(1,16)[Sign]
xyplot(Ykm~Xkm, data=vegetation, aspect="iso",
cex=MyCex, pch=MyPch, col=MyPch, main="Residuals sd_diff, color is sign, size~size")
###Checking assumptions for spatial model####
plot_assump <- function(Fit0, E0){
par(mfrow=c(2,2))
# Homogeneity
plot(x = Fit0, y = E0)
abline(h = 0, v = 0)
# Normality
hist(E0, breaks = 25)
# Independence due to model misfit
plot(x = vegetation$Pstd, y = E0)
abline(h = 0)
plot(x=vegetation$Hstd, y = E0)
abline(h = 0)
}
plot_assump(vegetation$Fit_diff_sp, vegetation$Res_diff_sp)
plot_assump(vegetation$Fit_err_sp, vegetation$Res_err_sp)
#Fitted data (with correction for SAR added to the data)
p_fit_diff <- ggplot(data= vegetation, aes(x=mean_diff, y=Fit_diff_sp))+
geom_point()+
geom_abline(intercept=0, slope=1, linetype="longdash", color='red')+
ggtitle("Fitted values ~ Real value for mean_diff")
p_fit_err <- ggplot(data= vegetation, aes(x=sd_diff, y=Fit_err_sp))+
geom_point()+
geom_abline(intercept=0, slope=1, linetype="longdash", color='red')+
ggtitle("Fitted values ~ Real value for sd_diff")
p_fit_diff
p_fit_err
#predicted data (SAR not added, but only used to estimate the fixed effects; these are the kind of predictions possible for extrapolation)
p_pred_diff <- ggplot(data= vegetation, aes(x=mean_diff, y=Pred_diff_sp))+
geom_point()+
geom_abline(intercept=0, slope=1, linetype="longdash", color='red')+
ggtitle("Predicted values ~ Real value for mean_diff")
p_pred_err <- ggplot(data= vegetation, aes(x=sd_diff, y=exp(Pred_err_sp)))+
geom_point()+
geom_abline(intercept=0, slope=1, linetype="longdash", color='red')+
ggtitle("Predicted values ~ Real value for sd_diff")
p_pred_diff
p_pred_err
#Regression coefficients
###mean_diff
post_beta0_diff <- M0_diff$summary.fixed[, c("mean", "0.025quant", "0.975quant")]
post_beta1_diff <- M1_diff$summary.fixed[, c("mean", "0.025quant", "0.975quant")]
NumberOfBetas <- nrow(M1_diff$summary.fixed)
Combined <- rbind(post_beta0_diff, post_beta1_diff)
Combined$WhichModel <- rep(c("Non-spatial", "Spatial"),each = NumberOfBetas)
Combined$WhichVariable <- rep(rownames(M1_diff$summary.fixed), 2)
colnames(Combined) <- c("Mean", "Lo", "Up", "WhichModel", "WhichVariable")
Combined
## Mean Lo Up WhichModel
## (Intercept) 0.340747255 0.295415160 0.386041368 Non-spatial
## Pstd 0.089522695 -0.186049188 0.364863640 Non-spatial
## Hstd 0.029203198 -0.018124738 0.076491476 Non-spatial
## I(Pstd^2) -0.427533574 -0.744693748 -0.110639131 Non-spatial
## I(Hstd^2) -0.003722869 -0.019001064 0.011542524 Non-spatial
## Diststd -0.149049481 -0.169063939 -0.129051792 Non-spatial
## meandVstd 0.084123643 0.065064650 0.103166666 Non-spatial
## Pstd:Hstd -0.202613227 -0.465863525 0.060416493 Non-spatial
## Hstd:I(Pstd^2) 0.071082343 -0.234386954 0.376295665 Non-spatial
## Pstd:I(Hstd^2) -0.033915739 -0.074401464 0.006536061 Non-spatial
## Intercept 0.286614928 0.108699935 0.482444004 Spatial
## Pstd1 0.790961761 0.556192338 1.025185413 Spatial
## Hstd1 0.049156647 0.011888611 0.086377262 Spatial
## I(Pstd^2)1 -0.935822224 -1.196438282 -0.675123385 Spatial
## I(Hstd^2)1 0.007784189 -0.004137936 0.019689792 Spatial
## Diststd1 -0.225802025 -0.298758515 -0.156891233 Spatial
## meandVstd1 0.029974004 -0.057117624 0.113551056 Spatial
## Pstd:Hstd1 -0.221429435 -0.428757480 -0.014198983 Spatial
## Hstd:I(Pstd^2)1 0.050524434 -0.187383620 0.288154076 Spatial
## Pstd:I(Hstd^2)1 -0.036712028 -0.069021629 -0.004430045 Spatial
## WhichVariable
## (Intercept) Intercept
## Pstd Pstd
## Hstd Hstd
## I(Pstd^2) I(Pstd^2)
## I(Hstd^2) I(Hstd^2)
## Diststd Diststd
## meandVstd meandVstd
## Pstd:Hstd Pstd:Hstd
## Hstd:I(Pstd^2) Hstd:I(Pstd^2)
## Pstd:I(Hstd^2) Pstd:I(Hstd^2)
## Intercept Intercept
## Pstd1 Pstd
## Hstd1 Hstd
## I(Pstd^2)1 I(Pstd^2)
## I(Hstd^2)1 I(Hstd^2)
## Diststd1 Diststd
## meandVstd1 meandVstd
## Pstd:Hstd1 Pstd:Hstd
## Hstd:I(Pstd^2)1 Hstd:I(Pstd^2)
## Pstd:I(Hstd^2)1 Pstd:I(Hstd^2)
p_posteriors_diff <- ggplot() +
geom_point(data = Combined, aes(x = WhichModel, y = Mean))+
geom_errorbar(data = Combined, aes(x = WhichModel, ymax = Up, ymin = Lo, color=WhichModel), width=0.2)+
xlab("Parameters") + ylab("Values")+
geom_hline(yintercept=0, linetype="dashed", color="red")+
theme(text = element_text(size = 15)) +
facet_wrap( ~ WhichVariable, scales = "free_y")+
theme(legend.position="none") +
ggtitle("Posteriors (effect sizes) for mean_diff")
p_posteriors_diff
p_diff <- ggplot(filter(Combined, WhichModel=="Spatial") %>% mutate(WhichVariable = fct_reorder(WhichVariable, Mean))) +
geom_pointrange() +
geom_hline(yintercept = 0, linetype = 2) +
aes(x = WhichVariable, y =Mean, ymin = Lo, ymax = Up) +
xlab("covariate")+ ylab("Effect size")+
ggtitle("Posteriors (effect sizes) for mean_diff")+
#
# scale_color_manual(values = c("SLA1" = "#66CD00",
# "SLA2" = "#cfefb0")) +
theme_classic() +
coord_flip()
p_diff
###sd_diff
post_beta0_err <- M0_err$summary.fixed[, c("mean", "0.025quant", "0.975quant")]
post_beta1_err <- M1_err$summary.fixed[, c("mean", "0.025quant", "0.975quant")]
NumberOfBetas <- nrow(M1_err$summary.fixed)
Combined <- rbind(post_beta0_err, post_beta1_err)
Combined$WhichModel <- rep(c("Non-spatial", "Spatial"),each = NumberOfBetas)
Combined$WhichVariable <- rep(rownames(M1_err$summary.fixed), 2)
colnames(Combined) <- c("Mean", "Lo", "Up", "WhichModel", "WhichVariable")
# Combined
p_posteriors_err <- ggplot() +
geom_point(data = Combined, aes(x = WhichModel, y = Mean))+
geom_errorbar(data = Combined, aes(x = WhichModel, ymax = Up, ymin = Lo, color=WhichModel), width=0.2)+
xlab("Parameters") + ylab("Values")+
geom_hline(yintercept=0, linetype="dashed", color="red")+
theme(text = element_text(size = 15)) +
facet_wrap( ~ WhichVariable, scales = "free_y")+
theme(legend.position="none")+
ggtitle("Posteriors (effect sizes) for sd_diff")
p_posteriors_err
p_err <- ggplot(filter(Combined, WhichModel=="Spatial") %>% mutate(WhichVariable = fct_reorder(WhichVariable, Mean))) +
geom_pointrange() +
geom_hline(yintercept = 0, linetype = 2) +
aes(x = WhichVariable, y =Mean, ymin = Lo, ymax = Up) +
xlab("covariate")+ ylab("Effect size")+
ggtitle("Posteriors (effect sizes) for sd_diff")+
#
# scale_color_manual(values = c("SLA1" = "#66CD00",
# "SLA2" = "#cfefb0")) +
theme_classic() +
coord_flip()
p_err
#Plots of spatial field
###Spatial field plots####
#contour
DSN <- "data/Study_Area_westkust.shp"
boundary <- readOGR(dsn = DSN, layer = "Study_Area_westkust")
## OGR data source with driver: ESRI Shapefile
## Source: "D:\fbatslee\OneDrive - UGent\Endure\DuneTopoMarram\data\Study_Area_westkust.shp", layer: "Study_Area_westkust"
## with 1 features
## It has 1 fields
## Integer64 fields read as strings: id
x1 <- c(22100, 22100, 39900, 39900)
y1 <- c(198700, 209000, 209000, 198700)
# Make a polygon of it.
AreaPoly <- Polygon(cbind(x1, y1), hole = FALSE)
AreaSP <- SpatialPolygons(list(Polygons(list(AreaPoly), ID = '1')))
AreaSP@proj4string <- boundary@proj4string
#define what outside the raster
Outraster <- gDifference(AreaSP, boundary)
#plot(Outraster)
##Spatial field for mean_diff##
w.pm <- M1_diff$summary.random$w$mean
w.proj <- inla.mesh.projector(mesh_diff)
w.pmf <- inla.mesh.project(w.proj, w.pm)
xygrid <- expand.grid(w.proj$x, w.proj$y)
Data3D <- data.frame(x = xygrid[,1],
y = xygrid[,2],
z = melt(w.pmf)[,3])
names(Data3D) <- c("x", "y", "z")
#Data3D <- Data3D %>% filter((x>=22.100 & x<=81.200) & (y>=197.600 & y<=229.800))
p_sf_diff <- ggplot(Data3D,
aes(x, y, z = z),
col = rgb(1, 0.5, 0.5, 0.7)) +
stat_contour(geom="polygon", aes(fill = ..level..)) +
geom_raster(aes(fill = z)) +
labs(fill="w.pm")+
# xlim(23231,23332)+
# ylim(197851,197902)+
geom_polygon(data=fortify(Outraster), aes(x=long/1000, y=lat/1000),
#fill="white",
alpha=0.5,
color="white", inherit.aes=FALSE)+
#geom_polygon(data=fortify(Outraster), aes(x=lat, y=long, z=NULL), color="black", fill=NA)+
#stat_contour(geom="polygon", aes(fill = ..level..)) +
#stat_contour(aes(colour = ..level..))+
xlab("x (Km)")+
ylab("y (Km)")+
xlim(c(22.100, 39.900))+
ylim(c(198.700, 209.000))+
scale_fill_distiller(palette="PuOr")+
theme_classic()+
coord_fixed(ratio = 1)
p_sf_diff
## Warning: Removed 6488 rows containing non-finite values (stat_contour).
## Warning: Removed 6315 rows containing missing values (geom_raster).
##Spatial field for sd_diff#
w.pm <- M1_err$summary.random$w$mean
w.proj <- inla.mesh.projector(mesh_err)
w.pmf <- inla.mesh.project(w.proj, w.pm)
xygrid <- expand.grid(w.proj$x, w.proj$y)
Data3D <- data.frame(x = xygrid[,1],
y = xygrid[,2],
z = melt(w.pmf)[,3])
names(Data3D) <- c("x", "y", "z")
p_sf_err <- ggplot(Data3D,
aes(x, y, z = z),
col = rgb(1, 0.5, 0.5, 0.7)) +
stat_contour(geom="polygon", aes(fill = ..level..)) +
geom_raster(aes(fill = z)) +
labs(fill="w.pm")+
# xlim(23231,23332)+
# ylim(197851,197902)+
geom_polygon(data=fortify(Outraster), aes(x=long/1000, y=lat/1000),
#fill="white",
alpha=0.5,
color="white", inherit.aes=FALSE)+
#geom_polygon(data=fortify(Outraster), aes(x=lat, y=long, z=NULL), color="black", fill=NA)+
#stat_contour(geom="polygon", aes(fill = ..level..)) +
#stat_contour(aes(colour = ..level..))+
xlab("x (m)")+
ylab("y (m)")+
xlim(c(22.100, 39.900))+
ylim(c(198.700, 209.000))+
scale_fill_distiller(palette="PuOr")+
theme_classic()+
coord_fixed(ratio = 1)
p_sf_err
## Warning: Removed 6488 rows containing non-finite values (stat_contour).
## Warning: Removed 6315 rows containing missing values (geom_raster).
#Predictions ~ raw data of H and P
#####Predicties ~ covariates
p_pred_diff_P <- ggplot(data= vegetation, aes(x=P, y=Pred_diff_sp))+
geom_point()+
ggtitle("Predicition Diff ~ P")
p_pred_diff_H <- ggplot(data= vegetation, aes(x=H, y=Pred_diff_sp))+
geom_point()+
ggtitle("Predicitions Diff ~ H")
p_pred_err_P <- ggplot(data= vegetation, aes(x=P, y=exp(Pred_err_sp)))+
geom_point()+
ggtitle("Predicitions Error ~ P")
p_pred_err_H <- ggplot(data= vegetation, aes(x=H, y=exp(Pred_err_sp)))+
geom_point()+
ggtitle("Predicitions Error ~ H")
grid.arrange(p_pred_diff_P, p_pred_diff_H, p_pred_err_P,p_pred_err_H, nrow=2, ncol=2)
##3D plots
plot_3d_diff <- plot_ly(vegetation,
x = ~P, y=~H, z =~Pred_diff_sp,
color=~Fit_diff_sp) %>%
add_markers() %>%
layout(title = "Predicted values Difference",
scene = list(xaxis=list(title="P"),
yaxis=list(title="H"),
zaxis=list(title="Pred Difference")))
plot_3d_diff
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
plot_3d_err <- plot_ly(vegetation,
x = ~P, y=~H, z =~Pred_err_sp,
color=~Fit_err_sp) %>%
add_markers() %>%
layout(title = "Predicted values Error",
scene = list(xaxis=list(title="P"),
yaxis=list(title="H"),
zaxis=list(title="Pred Error")))
plot_3d_err
#Plots for interpolations
p_int_diff_P <- ggplot(data= filter(interpol_df, H==32.5 |H==57.5),
aes(x=Pstd, y=Int_diff_mean, col=as.factor(H)))+
geom_point()+
ggtitle("Interpolations Diff ~ P")
p_int_diff_H <- ggplot(data= filter(interpol_df, Pstd==0.25 |Pstd==0.75),
aes(x=H, y=Int_diff_mean, col=as.factor(Pstd)))+
geom_point()+
ggtitle("Interpolations Diff ~ H")
p_int_err_P <- ggplot(data= filter(interpol_df, H==32.5 |H==57.5),
aes(x=Pstd, y=exp(Int_err_mean), col=as.factor(H) ))+
geom_point()+
ggtitle("Interpolations Error ~ P")
p_int_err_H <- ggplot(data= filter(interpol_df, Pstd==0.25 |Pstd==0.75),
aes(x=H, y=exp(Int_err_mean), col=as.factor(Pstd)))+
geom_point()+
ggtitle("Interpolations Error ~ H")
grid.arrange(p_int_diff_P, p_int_diff_H, p_int_err_P,p_int_err_H, nrow=2, ncol=2)
#3Dplot
Interpol_diff_3D <- plot_ly(data = interpol_df, x = ~H, y = ~Pstd, z=~Int_diff_mean, color=~Int_diff_mean) %>% add_markers() %>%
layout(title = "Change (Difference) in dune height",
scene = list(
xaxis = list(title = "JC"),
yaxis = list(title = "P"),
zaxis = list(title = "Height change (m)")))
Interpol_diff_3D
Interpol_err_3D <- plot_ly(data = interpol_df, x = ~H, y = ~Pstd, z=~exp(Int_err_mean), color=~exp(Int_err_mean)) %>% add_markers() %>%
layout(title = "Variability in change (Error) in dune height",
scene = list(
xaxis = list(title = "JC"),
yaxis = list(title = "P"),
zaxis = list(title = "Var height change (m)")))
Interpol_err_3D
#write.csv(interpol_df, "output/Interpolations_INLA_fielddata.csv")
#write.csv(vegetation, "output/Data_and_predictions_INLA_fielddata.csv")